1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134 | # YAML configuration file for the analysis
# max CPUs used by any rules
max_cpus: 16
# output directories
fastq10x_dir: results/fastq10x # FASTQs & QC 10x transcriptomic runs
mkfastq10x_dir: results/fastq10x/mkfastq_output # `cellranger mkfastq` output
genome_dir: results/genomes # location of downloaded genomes and annotations
refgenome: results/genomes/refgenome # STAR reference genome directory
aligned_fastq10x_dir: results/aligned_fastq10x # aligned 10x transcriptomic reads
#viral_fastq10x_dir: results/viral_fastq10x # viral tags / barcodes in 10X reads
#analysis_dir: results/analysis # fine-grained analyses
# cellular genome and GTF ftp sites
cell_genome_ftp: ftp://ftp.ensembl.org/pub/release-98/fasta/canis_familiaris/dna/Canis_familiaris.CanFam3.1.dna.toplevel.fa.gz
cell_gtf_ftp: ftp://ftp.ensembl.org/pub/release-98/gtf/canis_familiaris/Canis_familiaris.CanFam3.1.98.gtf.gz
# viral genome (FASTA), GTF, and Genbank file locations
viral_genome: data/flu_sequences/flu-CA09.fasta
viral_gtf: data/flu_sequences/flu-CA09.gtf
viral_genbank: data/flu_sequences/flu-CA09.gb
# file giving nucleotide identities at viral tag sites
#viraltag_identities: data/flu_sequences/flu-CA09_viral_tags.yaml
# STAR alignment parameters for transcriptomics. Reduce the penalty for
# non-canonical splice sites, which is probably bad for mapping cellular
# reads but is good for mapping viral reads which will have deletions
# not corresponding to splice sites.
scoreGapNoncan: -4
scoreGapGCAG: -4
scoreGapATAC: -4
# URL location of 10X barcode whitelist: **this is for the v3 kit**
cb_whitelist_10x_url: https://github.com/10XGenomics/cellranger/raw/master/lib/python/cellranger/barcodes/3M-february-2018.txt.gz
cb_whitelist_10x: results/aligned_fastq10x/cb_whitelist_10x.txt
cb_len_10x: 16 # length of 10X cell barcode
umi_len_10x: 12 # length of 10X UMI: **this is for the v3 kit**
#----------------------------------------------------------------------------
# configuration of experiments
#----------------------------------------------------------------------------
experiments:
hashing_wt_rapidpilot:
description: Single-cell transcriptomics using a small-scale pilot rescue
of the wildtype viral tag variant. Lab notes are at
https://benchling.com/s/etr-Q28fCd1kprRNxAd0v5Hg
expect_ncells: 2000
transcriptomics:
2019-12-03:
index: SI-GA-A3
bcl_folder: /shared/ngs/illumina/bloom_lab/191203_M03100_0504_000000000-CNCN9/
lane: '*'
hashing_trial1:
description: Single-cell transcriptomics using the wildtype and dblSyn viral
tag variants. Infection volume was chosen based on HA expression
measured by flow cytometry. Lab notes are at
https://benchling.com/s/etr-i9I0yHiFb0P8wHCxosim
expect_ncells: 2000
transcriptomics:
2020-01-16:
index: SI-GA-A4
bcl_folder: /shared/ngs/illumina/bloom_lab/200128_M03100_0528_000000000-CRC4G/
lane: '*'
2020-02-18:
index: SI-GA-A4
bcl_folder: /shared/ngs/illumina/bloom_lab/200218_D00300_0910_AHCHHJBCX3/Raw/
lane: 2
hashing_trial2:
description: Single-cell transcriptomics using the wildtype and dblySyn viral
tag variants. Infection volume was chosen based on the results
of `hashing_trial1` and flow cytometry. The incolum volume for
the wildtype tag variant was about 12-fold higher, and for the
dblSyn tag variant was about 24-fold higher.
expect_ncells: 2000
transcriptomics:
2020-06-02:
index: SI-GA-B3
bcl_folder: /home/solexa/ngs/illumina/bloom_lab/200602_M03100_0578_000000000-J3464/
lane: '*'
2020-07-01:
index: SI-GA-B3
bcl_folder: /shared/ngs/illumina/agreaney/200701_D00300_0988_BHGFGMBCX3/raw/200701_D00300_0988_BHGFGMBCX3/
lane: 2
2020-08-26:
index: SI-GA-B3
bcl_folder: /shared/ngs/illumina/bloom_lab/bloom_lab/200826_D00300_1040_BHHGLWBCX3/raw/200826_D00300_1040_BHHGLWBCX3/
lane: '*'
hashing_trial3_withNH4Cl:
description: None provided.
expect_ncells: 2000
transcriptomics:
2020-07-24:
index: SI-GA-B5
bcl_folder: /shared/ngs/illumina/bloom_lab/200724_M03100_0593_000000000-J33YK/
lane: '*'
2020-08-26:
index: SI-GA-B5
bcl_folder: /shared/ngs/illumina/bloom_lab/bloom_lab/200826_D00300_1040_BHHGLWBCX3/raw/200826_D00300_1040_BHHGLWBCX3/
lane: '*'
hashing_trial3_noNH4Cl:
description: None provided
expect_ncells: 2000
transcriptomics:
2020-07-24:
index: SI-GA-B4
bcl_folder: /shared/ngs/illumina/bloom_lab/200724_M03100_0593_000000000-J33YK/
lane: '*'
scProgenyProduction_trial1:
description: None provided.
expect_ncells: 2000
transcriptomics:
2020-09-25:
index: SI-GA-B8
bcl_folder: /shared/ngs/illumina/bloom_lab/200925_D00300_1065_AHHL7NBCX3/raw/200925_D00300_1065_AHHL7NBCX3/
lane: 2
scProgenyProduction_trial2:
description: None provided.
expect_ncells: 2000
transcriptomics:
2020-09-25:
index: SI-GA-B9
bcl_folder: /shared/ngs/illumina/bloom_lab/200925_D00300_1065_AHHL7NBCX3/raw/200925_D00300_1065_AHHL7NBCX3/
lane: '*'
|